import openai

xinference_url = "10.26.33.159"
xinference_port = 9997
model_entities_uid = "tt_Meta-Llama-3.1-8B-Instruct"
client = openai.Client(
    api_key="cannot be empty",
    base_url=f"http://{xinference_url}:{xinference_port}/v1"
)
# 调用聊天生成接口
response = client.chat.completions.create(
    model=model_entities_uid,
    messages=[{"role": "user", "content": "Hello"}],
    max_tokens=100,
    temperature=0.7)

# 输出生成的文本
print(response.choices[0].message.content)
